---
title: "Cleaning Arab Barometer"
---

# Load

```{r}
# load packages
  source("helper-packages.R")

# load arab barometer: round 1
  arabb_1_raw <- 
    read_dta("../raw-data/y-multi-arab-barometer/arab-barometer-i/ABI_English.dta")

# load arab barometer: round 2
  arabb_2_raw <- 
    read_dta("../raw-data/y-multi-arab-barometer/arab-barometer-ii/ABII_English.dta")
  
# load arab barometer: round 3
  arabb_3_raw <- 
    read_dta("../raw-data/y-multi-arab-barometer/arab-barometer-iii/ABIII_English.dta", encoding = "latin1")
  
# load arab barometer: round 4
  arabb_4_raw <- 
    read_dta("../raw-data/y-multi-arab-barometer/arab-barometer-iv/ABIV_English.dta")
  
# load arab barometer: round 5
  arabb_5_raw <- 
    read_dta("../raw-data/y-multi-arab-barometer/arab-barometer-v/ABV_Release_Data.dta")
  
# load arab barometer: round 6
  arabb_6_raw <- 
    read_dta("../raw-data/y-multi-arab-barometer/arab-barometer-vi/Arab Baometer Wave VI Part 3_NOV/Arab_Barometer_Wave_6_Part_3_ENG_RELEASE.dta")
```

# Clean Round 1

```{r}
# dates
  abb1_dates <- 
    tribble(
      ~resp_country_common, ~resp_interview_start_date, ~resp_interview_end_date,
      "Algeria", "Jun. 1, 2006", "Jun. 30, 2006", 
      "Bahrain", "Jan. 1, 2009", "May. 31, 2009",
      "Jordan", "Jun. 8, 2006", "Jun. 16, 2006",
      "Lebanon", "Nov. 1, 2006", "Nov. 30, 2006",
      "Morocco", "Jan. 1, 2007", "Dec. 31, 2007",
      "Palestine", "May. 18, 2006", "May. 20, 2006",
      "Yemen", "Nov. 1, 2007", "Dec. 31, 2007") %>% 
    mutate(
      resp_interview_start_date = as.Date(resp_interview_start_date, "%b. %d, %Y"),
      resp_interview_end_date = as.Date(resp_interview_end_date, "%b. %d, %Y"))

# clean
  clean_ab1 <- 
    arabb_1_raw %>% 
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "Arab Barometer",
        
      # round number (character vector, title case)  
        resp_round = "Wave 1",
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3sYWE4L",

      # survey mode (in-person/phone/internet)
        resp_survey_mode = "in-person",    

      # country (character vector; list of countries as written in original source)
        resp_country_original = to_character(country),

      # country (character vector; converts to countrycode county.name list)
        resp_country_common = 
          countryname(resp_country_original),
        
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date = as.Date(NA_character_)) %>% 
        left_join(
          abb1_dates, by = "resp_country_common") %>% 
        mutate(
   
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
      # respondent's religion (character vector that corresponds to master list)
        resp_religion = 
          dplyr::recode(
            as.character(q711),
            "1" = "Muslim",
            "2" = "Christian",
            "3" = "Muslim",
            "4" = "Muslim",
            "5" = "Other religion",
            .default = NA_character_),      

      # respondent's religion (character vector that corresponds to master list)
        resp_denomination = to_character(q711),   
    
      # respondent's age (character vector; bins denoted by single dash ["18-25"]) 
        resp_age = case_when( #age missing not recorded in category variable
          is.na(q701) == FALSE ~ as.character(q701),
          is.na(q701) == TRUE & q701agecategories == 1 ~ as.character("18-24"),
          is.na(q701) == TRUE & q701agecategories == 2 ~ as.character("25-34"),
          is.na(q701) == TRUE & q701agecategories == 3 ~ as.character("35-44"),
          is.na(q701) == TRUE & q701agecategories == 4 ~ as.character("45-54"),
          is.na(q701) == TRUE & q701agecategories == 5 ~ as.character("55-64"),
          is.na(q701) == TRUE & q701agecategories == 6 ~ as.character("65-74"),
          is.na(q701) == TRUE & q701agecategories == 7 ~ as.character("75+")),
      
      # respondent's education level
        resp_education_original =
          dplyr::recode(
            as.character(q703),
            "1" = "1. Illiterate [No education]",
            "2" = "2. Elementary [No education]", # implies not primary
            "3" = "3. Primary [Primary]",
            "4" = "4. Secondary [Primary]",
            "5" = "5. College Diploma - two years [Primary]",
            "6" = "6. BA [College]",
            "7" = "7. MA or higher [College]",
            .default = NA_character_),       
      
      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female = (q702 == 2)*1,
      
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0)
        resp_rural = NA_real_,  # cheese
      
    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: q3031; QTEXT: Which of the following groups do you wish to have as neighbors? Followers of other religions.; ROPTIONS: 1 = I do not wish [=1] + 2 = I do not mind [=0]; TARGET: Different religion; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_1_original = 
          dplyr::recode(
            as.character(q3031),
            "98" = NA_character_,
            "99" = NA_character_,
            "100" = NA_character_,),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            q3031 %in% c(1) ~ 1,
            q3031 %in% c(2) ~ 0,
            TRUE ~ NA_real_),
    
    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = "NUM: q204; QTEXT: Generally speaking, would you say that most people can be trusted?; ROPTIONS: 1 = Most people can be trusted + 2 = You must be very careful in dealing with people",
    
      # original response (as character vector)
        resp_gentrust_original = dplyr::recode(
            as.character(q204),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = case_when(
            q204 %in% c(1) ~ 0,
            q204 %in% c(2) ~ 1,
            TRUE ~ NA_real_),  
    
    #########################  
    ##### RELIGIOSITY #######  
    #########################
    
      # original question number; question text; response options (input above)
        resp_religiosity_qinfo = "NUM: q714a; QTEXT: In general, would you describe yourself as...;  ROPTIONS: 1 = Religious + 2 = In between + 3 = Not religious + 4 = Other",
  
      # original response (as numeric vector, with non-substantive responses coded as NA_real_)
        resp_religiosity_original = 
          dplyr::recode(
            as.numeric(q714a),
            "4" = NA_real_,
            "97" = NA_real_,
            "98" = NA_real_,
            "99" = NA_real_),       

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = (3 - resp_religiosity_original)/2,
    
    ) %>% 
    select(starts_with("resp_"))
```

# Clean Round 2

```{r}
# dates
  abb2_dates <- 
      tribble(
        ~resp_country_common, ~sample_type, ~resp_interview_start_date, ~resp_interview_end_date,
        "Algeria", "Main sample", "Apr. 15, 2011", "May. 11, 2011",
        "Egypt", "Main sample", "Jun. 16, 2011", "Jun. 30, 2011",
        "Iraq", "Main sample", "Feb. 20, 2011", "Mar. 12, 2011",
        "Jordan", "Main sample", "Dec. 10, 2010", "Dec. 16, 2010",
        "Lebanon", "Main sample", "Nov. 24, 2010", "Dec. 6, 2010",
        "Lebanon", "Additional sample", "Apr. 9, 2011", "Apr. 24, 2011",
        "Palestine", "Main sample", "Dec. 2, 2010", "Dec. 5, 2010",
        "Saudi Arabia", "Main sample", "Jan. 5, 2011", "Feb. 6, 2011",
        "Saudi Arabia", "Additional sample", "Mar. 26, 2011", "Apr. 9, 2011",
        "Sudan", "Main sample", "Dec. 12, 2010", "Dec. 30, 2010",
        "Sudan", "Additional sample", "Mar. 24, 2011", "Apr. 23, 2011",
        "Tunisia", "Main sample", "Sep. 30, 2011", "Oct. 11, 2011",
        "Yemen", "Main sample", "Jan. 5, 2011", "Feb. 6, 2011",
        "Yemen", "Additional sample", "Mar. 26, 2011", "Apr. 9, 2011") %>% 
      mutate(
        resp_interview_start_date = as.Date(resp_interview_start_date, "%b. %d, %Y"),
        resp_interview_end_date = as.Date(resp_interview_end_date, "%b. %d, %Y"))

# clean
  clean_ab2 <- 
    arabb_2_raw %>% 
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "Arab Barometer",
        
      # round number (character vector, title case)  
        resp_round = "Wave 2",
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3sYWE4L",

      # survey mode (in-person/phone/internet)
        resp_survey_mode = "in-person",    

      # country (character vector; list of countries as written in original source)
        resp_country_original = 
          dplyr::recode(
            as.character(country),
            "1" = "Algeria",
            "5" = "Egypt",
            "7" = "Iraq",
            "8" = "Jordan",
            "10" = "Lebanon",
            "15" = "Palestine",
            "17" = "Saudi Arabia",
            "19" = "Sudan",
            "21" = "Tunisia",
            "22" = "Yemen"),

      # country (character vector; converts to countrycode county.name list)
        resp_country_common = 
          countryname(resp_country_original),
        
      # sample information
        sample_type = dplyr::recode(
            as.character(samp),
            "1" = "Main sample",
            "2" = "Additional sample"),
      
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date = as.Date(NA_character_)) %>% 
        left_join(
          abb2_dates, by = c("resp_country_common", "sample_type")) %>% 
        mutate(
   
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
      # respondent's religion (character vector that corresponds to master list)
        q1012 = to_character(q1012),
        sa1012 = to_character(sa1012),
        resp_denomination = 
          case_when(
            is.na(q1012) ~ sa1012,
            TRUE ~ q1012),
        
      # respondent's religion (character vector that corresponds to master list)
        resp_religion = 
          case_when(
            resp_denomination %in% c("2. christian") ~ "Christian",
            resp_denomination %in% c("1. muslim", "8001. hanbali", "8002. sunni", "8003. shafi'i", "8004. shi'ite", "8005. jaafari") ~ "Muslim",
            resp_denomination %in% c("10001. jewish") ~ "Jewish",
            resp_denomination %in% c("3001. khaki") ~ "Other religion"),    

      # respondent's age (character vector; bins denoted by single dash ["18-25"])
        resp_age = 
          dplyr::recode(
            as.character(q1001),
            "0" = NA_character_,
            "994" = NA_character_,
            "998" = NA_character_,
            "999" = NA_character_),    
      
      # respondent's education level
        resp_education_original = 
          case_when(
            country == 21 & t1003 == 1 ~ "1. Illiterate/Literate [No education]",
            country == 21 & t1003 == 2 ~ "2. Elementary [No education]", # difficult; but seems to imply non-primary
            country == 21 & t1003 == 3 ~ "3. Preparatory/Basic [Primary]",
            country == 21 & t1003 == 4 ~ "4. Secondary [Primary]",
            country == 21 & t1003 == 5 ~ "5. BA [College]",
            country == 21 & t1003 == 6 ~ "6. MA and above [College]",
            
            country != 21 & q1003 == 1 ~ "1. Illiterate/Literate [No education]",
            country != 21 & q1003 == 2 ~ "2. Elementary [No education]", # difficult; but seems to imply non-primary 
            country != 21 & q1003 == 3 ~ "3. Preparatory/Basic [Primary]",
            country != 21 & q1003 == 4 ~ "4. Secondary [Primary]",
            country != 21 & q1003 == 5 ~ "5. Mid-level diploma/professional or technical [Primary]",
            country != 21 & q1003 == 6 ~ "6. BA [College]",
            country != 21 & q1003 == 7 ~ "7. MA and above [College]"),
      
      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female = (q1002 == 2)*1,
      
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0)
        resp_rural =
          case_when(
            v13 == 2 ~ 1,
            v13 %in% c(1,3) ~ 0,
            q13 == 2 ~ 1,
            q13 == 1 ~ 0,
            TRUE ~ NA_real_),
      
    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: q6021; QTEXT: Members of which of the following groups would you not like to have as neighbors? Followers of other religions.; ROPTIONS: 1 = I do not want them to be my neighbors [=1] + 2 = I do not object [=0]; TARGET: Different religion; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_1_original = 
          dplyr::recode(
            as.character(q6021),
            "8" = NA_character_,
            "9" = NA_character_,
            "0" = NA_character_,),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            q6021 %in% c(1) ~ 1,
            q6021 %in% c(2) ~ 0,
            TRUE ~ NA_real_),
    
    #########################  
    ### SOCIAL DISTANCE 2 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_2_qinfo = "NUM: q602 [ir6026]; QTEXT: Members of which of the following groups would you not like to have as neighbors? Those who belong to religious sects other than yours.; ROPTIONS: 1 = I do not want them to be my neighbors [=1] + 2 = I do not object [=0]; TARGET: Different sect, general; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_2_original = 
          dplyr::recode(
            as.character(ir6026),
            "8" = NA_character_,
            "9" = NA_character_,
            "0" = NA_character_,),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_2_bin_recode = 
          case_when(
            ir6026 %in% c(1) ~ 1,
            ir6026 %in% c(2) ~ 0,
            TRUE ~ NA_real_),
    
    #########################  
    ### SOCIAL DISTANCE 3 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_3_qinfo = "NUM: q604 [q6049]; QTEXT: To what extent do you consider the following factors obstacles to accepting your son/daughter/sister/brother's marriage? From a different religion or denomination.; ROPTIONS: 1 = Constitutes an obstacle to a great extent [=1] + 2 = Constitutes an obstacle to a medium extent [=1] + 3 = Constitutes an obstacle to a limited extent [=0] + 4 = Does not constitute an obstacle whatsoever [=0]; TARGET: Different religion; TYPE: Distance, family",
      
      # original response (as character vector)
        resp_soc_dist_3_original = 
          dplyr::recode(
            as.character(q6049),
            "8" = NA_character_,
            "9" = NA_character_,
            "0" = NA_character_,),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_3_bin_recode = 
          case_when(
            q6049 %in% c(1:2) ~ 1,
            q6049 %in% c(3:4) ~ 0,
            TRUE ~ NA_real_),
    
    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = "NUM: q103; QTEXT: Generally speaking, do you think most people are trustworthy or not?; ROPTIONS: 1 = Most people are trustworthy [=0] + 2 = Most people are not trustworthy [=1]",
    
      # original response (as character vector)
        resp_gentrust_original = dplyr::recode(
            as.character(q103),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = case_when(
            q103 %in% c(1) ~ 0,
            q103 %in% c(2) ~ 1,
            TRUE ~ NA_real_),  
    
    #########################  
    ##### RELIGIOSITY #######  
    #########################
    
      # original question number; question text; response options (input above)
        resp_religiosity_qinfo = "NUM: q609; QTEXT: Generally speaking, would you describe yourself as...;  ROPTIONS: 1 = Religious + 2 = Somewhat religious + 3 = Not religious + 4 = Other",
  
      # original response (as numeric vector, with non-substantive responses coded as NA_real_)
        resp_religiosity_original = 
          dplyr::recode(
            as.numeric(q609),
            "0" = NA_real_,
            "4" = NA_real_,
            "9" = NA_real_),       

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = (3 - resp_religiosity_original)/2
    
    ) %>% 
    select(starts_with("resp_"))
```

# Clean Round 3

-NOT PROCESSED:
--REASONS:
---No question on social distance
--NOTES:

# Clean Round 4

```{r}
# dates
  abb4_dates <- 
      tribble(
        ~resp_country_common, ~resp_interview_start_date, ~resp_interview_end_date,
        "Algeria", "May. 3, 2016", "May. 16, 2016", 
        "Egypt", "Apr. 15, 2016", "Apr. 23, 2016",
        "Jordan", "Mar. 9, 2016", "Mar. 16, 2016",
        "Lebanon", "Jul. 20, 2016", "Aug. 16, 2016",
        "Morocco", "May. 7, 2016", "Jun. 11, 2016",
        "Palestine", "Feb. 18, 2016", "Feb. 27, 2016",
        "Tunisia", "Feb. 13, 2016", "Mar. 3, 2016",
        ) %>% 
      mutate(
        resp_interview_start_date = as.Date(resp_interview_start_date, "%b. %d, %Y"),
        resp_interview_end_date = as.Date(resp_interview_end_date, "%b. %d, %Y"))

# clean
  clean_ab4 <- 
    arabb_4_raw %>% 
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "Arab Barometer",
        
      # round number (character vector, title case)  
        resp_round = "Wave 4",
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3sYWE4L",

      # survey mode (in-person/phone/internet)
        resp_survey_mode = "in-person",    

      # country (character vector; list of countries as written in original source)
        resp_country_original = 
          dplyr::recode(
            as.character(country),
            "1" = "Algeria",
            "5" = "Egypt",
            "8" = "Jordan",
            "10" = "Lebanon",
            "13" = "Morocco",
            "15" = "Palestine",
            "21" = "Tunisia"),

      # country (character vector; converts to countrycode county.name list)
        resp_country_common = 
          countryname(resp_country_original),
      
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date = as.Date(NA_character_)) %>% 
        left_join(
          abb4_dates, by = "resp_country_common") %>% 
        mutate(
   
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
      # respondent's religion (character vector that corresponds to master list)
        resp_religion = 
          case_when(
            q1012 == 1 ~ "Muslim",
            q1012 == 2 ~ "Christian",
            q1012 == 3 & q1012other == 10001 ~ "Other religion", # Druze
            q1012 == 3 & q1012other == 13001 ~ NA_character_,
            q1012 == 4 ~ "Jewish",
            TRUE ~ NA_character_),

      # respondent's religion (character vector that corresponds to master list)
        resp_denomination = 
          case_when(
            q1012a == 1 ~ "Maronite",
            q1012a == 2 ~ "Orthodox",
            q1012a == 3 ~ "Catholic",
            q1012a == 4 ~ "Armenian",
            q1012a == 5 ~ "Sunni",
            q1012a == 6 ~ "Shia",
            q1012a == 7 ~ "Hanbali",
            q1012a == 8 ~ "Shafi'i",
            q1012a == 9 ~ "Ja'afari",
            q1012a == 10 & q1012 == 4 ~ "Jewish",
            q1012a == 10 & q1012 == 3 & q1012other == 10001 ~ "Druze",
            q1012a == 10 & q1012 == 3 & q1012other == 13001 ~ NA_character_,
            q1012a == 10 & q1012 != 4 & q1012 != 3 ~ "Other religion",
            q1012a == 11 ~ "Just a Muslim"), 
    
      # respondent's age (character vector; bins denoted by single dash ["18-25"])
        resp_age = 
          dplyr::recode(
            as.character(q1001),
            "999" = NA_character_), 
      
      # respondent's education level
        resp_education_original = case_when(
          q1003 == 1 ~ "1. Illiterate/No formal education [No education]",
          q1003 == 2 ~ "2. Elementary [No education]",
          q1003 == 3 ~ "3. Preparatory/Basic [Primary]",
          q1003 == 4 ~ "4. Secondary [Primary]",
          q1003 == 5 ~ "5. Mid-level diploma / professional or technical [Primary]",
          q1003 == 6 ~ "6. BA [College]",
          q1003 == 7 ~ "7. MA and above [College]",
          
          t1003 == 1 ~ "1. Illiterate/No formal education [No education]",
          t1003 == 2 ~ "2. Elementary [No education]",
          t1003 == 3 ~ "3. Preparatory/Basic [Primary]",
          t1003 == 4 ~ "4. Secondary [Primary]",
          t1003 == 5 ~ "5. BA [College]",
          t1003 == 6 ~ "6. MA and above [College]"),
      
      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female = (q1002 == 2)*1,
      
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0)
        resp_rural = (q13 == 2)*1, # refugee camp as urban
      
    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: q6021; QTEXT: For each of the following types of people, please tell me whether you would like having people from this group as neighbors, dislike it, or not care? People of a different religion.; ROPTIONS: 1 = Strongly dislike [=1] + 2 = Somewhat dislike [=1] + 3 = Would not care [=0] + 4 = Somewhat like [=0] + 5 = Strongly like [=0]; TARGET: Different religion; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_1_original = 
          dplyr::recode(
            as.character(q6021),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            q6021 %in% c(1:2) ~ 1,
            q6021 %in% c(3:5) ~ 0,
            TRUE ~ NA_real_),
    
    #########################  
    ### SOCIAL DISTANCE 2 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_2_qinfo = "NUM: q6024; QTEXT: For each of the following types of people, please tell me whether you would like having people from this group as neighbors, dislike it, or not care?  People of a different sect of Islam.; ROPTIONS: 1 = Strongly dislike [=1] + 2 = Somewhat dislike [=1] + 3 = Would not care [=0] + 4 = Somewhat like [=0] + 5 = Strongly like [=0]; TARGET: Different sect, Muslim; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_2_original = 
          dplyr::recode(
            as.character(q6024),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_2_bin_recode = 
          case_when(
            q6024 %in% c(1:2) ~ 1,
            q6024 %in% c(3:5) ~ 0,
            TRUE ~ NA_real_),
    
    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = "NUM: q103; QTEXT: Generally speaking, do you think most people are trustworthy or not?; ROPTIONS: 1 = Most people are trustworthy + 2 = Most people are not trustworthy",
    
      # original response (as character vector)
        resp_gentrust_original = dplyr::recode(
            as.character(q103),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = case_when(
            q103 %in% c(1) ~ 0,
            q103 %in% c(2) ~ 1,
            TRUE ~ NA_real_),  
    
    #########################  
    ##### RELIGIOSITY #######  
    #########################
    
      # original question number; question text; response options (input above)
        resp_religiosity_qinfo = "NUM: q609; QTEXT: In general, you would describe yourself as...;  ROPTIONS: 1 = Religious + 2 = Somewhat religious + 3 = Not religious + 4 = Other",
  
      # original response (as numeric vector, with non-substantive responses coded as NA_real_)
        resp_religiosity_original = 
          dplyr::recode(
            as.numeric(q609),
            "98" = NA_real_,
            "99" = NA_real_),       

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = (3 - resp_religiosity_original)/2,
    
    ) %>% 
    select(starts_with("resp_"))
```

# Clean Round 5

```{r}
# dates
  abb5_dates <- 
      tribble(
        ~resp_country_common, ~resp_interview_start_date, ~resp_interview_end_date,
        "Algeria", "Jan. 30, 2019", "Feb. 18, 2019", 
        "Egypt", "Oct. 13, 2018", "Nov. 11, 2018",
        "Iraq", "Dec. 24, 2018", "Jan. 27, 2019",
        "Jordan", "Oct. 15, 2018", "Nov. 1, 2018",
        "Kuwait", "Apr. 22, 2019", "Jun. 11, 2019",
        "Lebanon", "Sep. 21, 2018", "Oct. 19, 2018",
        "Libya", "Jan. 19, 2019", "Apr. 11, 2019",
        "Morocco", "Oct. 11, 2018", "Dec. 4, 2018",
        "Palestine", "Oct. 5, 2018", "Oct. 24, 2018",
        "Sudan", "Sep. 17, 2018", "Dec. 7, 2018",
        "Tunisia", "Oct. 29, 2018", "Dec. 4, 2018",
        "Yemen", "Dec. 3, 2018", "Dec. 15, 2018") %>% 
      mutate(
        resp_interview_start_date = as.Date(resp_interview_start_date, "%b. %d, %Y"),
        resp_interview_end_date = as.Date(resp_interview_end_date, "%b. %d, %Y"))

# clean
  clean_ab5 <- 
    arabb_5_raw %>% 
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "Arab Barometer",
        
      # round number (character vector, title case)  
        resp_round = "Wave 5",
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3sYWE4L",

      # survey mode (in-person/phone/internet)
        resp_survey_mode = "in-person",    

      # country (character vector; list of countries as written in original source)
        resp_country_original = to_character(country),

      # country (character vector; converts to countrycode county.name list)
        resp_country_common = 
          countryname(resp_country_original),
        
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date = date) %>% 
        left_join(
          abb5_dates, by = "resp_country_common") %>% 
        mutate(
   
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
      # respondent's religion (character vector that corresponds to master list)
        resp_denomination = to_character(Q1012A), 
    
      # respondent's religion (character vector that corresponds to master list)
        resp_religion = 
          dplyr::recode(
            as.character(Q1012),
            "1" = "Muslim",
            "2" = "Christian",
            "3" = "Jewish",
            "4" = NA_character_,
            "5" = "Other religion",
            .default = NA_character_),
    
      # fix errors in resp_religion coding (assuming here that resp_denomination is more accurate)
        resp_religion = 
          case_when(
            resp_denomination %in% c("Just a Muslim", "Shia", "Sunni", "Shafi’i") ~ "Muslim",
            resp_denomination %in% c("don't know") ~ NA_character_,
            resp_denomination %in% c("Druze") ~ "Other religion",
            TRUE ~ resp_religion),
    
      # respondent's age (character vector; bins denoted by single dash ["18-25"])
        Q1001 = as.character(Q1001),
        Q1001GCC = as.character(Q1001GCC),
        resp_age = case_when(
          Q1001 != "99999" ~ as.character(Q1001),
          Q1001 == "99999" & Q1001GCC != "99999" ~ as.character(Q1001GCC),
          Q1001GCC == "99999" ~ NA_character_),
      
      # respondent's education level
        resp_education_original =
          dplyr::recode(
            as.character(Q1003),
            "1" = "1. No formal education [No education]",
            "2" = "2. Elementary [Primary]",
            "3" = "3. Preparatory/Basic [Primary]",
            "4" = "4. Secondary [Primary]",
            "5" = "5. Mid-level diploma/professional or technical [Primary]",
            "6" = "6. BA [College]",
            "7" = "7. MA and above [College]",
            .default = NA_character_),       
      
      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female = (Q1002 == 2)*1,
      
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0) 
        resp_rural =
          case_when(
            # q13: settlement
            q13 == 1 ~ 0, # urban
            q13 == 2 ~ 1, # rural
            q13 == 3 ~ 0, # refugee camps
            ),
      
    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: Q602_1; QTEXT: For each of the following types of people, please tell me how much you would like having people from this group as your neighbors. People of a different religion.; ROPTIONS: 1 = Strongly dislike [=1] + 2 = Dislike [=1] + 3 = Neither dislike, nor like [=0] + 4 = Like [=0] + 5 = Strongly like [=0]; TARGET: Different religion; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_1_original = 
          dplyr::recode(
            as.character(Q602_1),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            Q602_1 %in% c(1:2) ~ 1,
            Q602_1 %in% c(3:5) ~ 0,
            TRUE ~ NA_real_),

    #########################  
    ### SOCIAL DISTANCE 2 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_2_qinfo = "NUM: Q602.4a; QTEXT: For each of the following types of people, please tell me how much you would like having people from this group as your neighbors. People of a different sect of Islam. [Asked of Muslims only.]; ROPTIONS: 1 = Strongly dislike [=1] + 2 = Dislike [=1] + 3 = Neither dislike, nor like [=0] + 4 = Like [=0] + 5 = Strongly like [=0]; TARGET: Different sect, Muslim; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_2_original = 
          dplyr::recode(
            as.character(Q602_4A),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_2_bin_recode = 
          case_when(
            Q602_4A %in% c(1:2) ~ 1,
            Q602_4A %in% c(3:5) ~ 0,
            TRUE ~ NA_real_),
    
    #########################  
    ### SOCIAL DISTANCE 3 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_3_qinfo = "NUM: Q602.4b; QTEXT: For each of the following types of people, please tell me how much you would like having people from this group as your neighbors. People of a different Christian denomination. [Asked of Christians only.]; ROPTIONS: 1 = Strongly dislike [=1] + 2 = Dislike [=1] + 3 = Neither dislike, nor like [=0] + 4 = Like [=0] + 5 = Strongly like [=0]; TARGET: Different sect, Christian; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_3_original = 
          dplyr::recode(
            as.character(Q602_4B),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_3_bin_recode = 
          case_when(
            Q602_4B %in% c(1:2) ~ 1,
            Q602_4B %in% c(3:5) ~ 0,
            TRUE ~ NA_real_),
    
    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = "NUM: q103; QTEXT: Generally speaking, would you say that 'Most people can be trusted' or that 'you must be very careful in dealing with people'?; ROPTIONS: 1 = Most people can be trusted + 2 = You must be very careful in dealing with people",
    
      # original response (as character vector)
        resp_gentrust_original = dplyr::recode(
            as.character(Q103),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = case_when(
            Q103 %in% c(1) ~ 0,
            Q103 %in% c(2) ~ 1,
            TRUE ~ NA_real_),
    
    #########################  
    ##### RELIGIOSITY #######  
    #########################
    
      # original question number; question text; response options (input above)
        resp_religiosity_qinfo = "NUM: Q609; QTEXT: In general, you would describe yourself as...;  ROPTIONS: 1 = Religious + 2 = Somewhat religious + 3 = Not religious",
  
      # original response (as numeric vector, with non-substantive responses coded as NA_real_)
        resp_religiosity_original = 
          dplyr::recode(
            as.numeric(Q609),
            "98" = NA_real_,
            "99" = NA_real_),       

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = (3 - resp_religiosity_original)/2
    
    ) %>% 
    select(starts_with("resp_"))
```

# Clean Round 6

```{r}
# dates
  abb6_dates <- 
      tribble(
        ~resp_country_common, ~resp_interview_start_date, ~resp_interview_end_date,
        "Algeria", "Mar. 31, 2021", "Apr. 14, 2021", 
        "Iraq", "Mar. 12, 2018", "Mar. 27, 2021",
        "Jordan", "Mar. 21, 2021", "Mar. 30, 2021",
        "Lebanon", "Mar. 25, 2021", "Apr. 3, 2021",
        "Libya", "Mar. 17, 2021", "Apr. 4, 2021",
        "Morocco", "Mar. 24, 2021", "Apr. 5, 2021",
        "Tunisia", "Mar. 06, 2021", "Mar. 16, 2021",
        ) %>% 
      mutate(
        resp_interview_start_date = as.Date(resp_interview_start_date, "%b. %d, %Y"),
        resp_interview_end_date = as.Date(resp_interview_end_date, "%b. %d, %Y"))

# clean
  clean_ab6 <- 
    arabb_6_raw %>% 
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "Arab Barometer",
        
      # round number (character vector, title case)  
        resp_round = "Wave 6",
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3sYWE4L",

      # survey mode (in-person/phone)
        resp_survey_mode = "phone", # due to covid

      # country (character vector; list of countries as written in original source)
        resp_country_original = to_character(COUNTRY),

      # country (character vector; converts to countrycode county.name list)
        resp_country_common = 
          countryname(resp_country_original),
        
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date = as.Date(DATE)) %>% 
        left_join(
          abb6_dates, by = "resp_country_common") %>% 
        mutate(
   
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
      # respondent's religion (character vector that corresponds to master list)
        resp_religion = 
          dplyr::recode(
            as.character(Q1012),
            "1" = "Muslim",
            "2" = "Christian",
            "3" = "Other religion",
            "4" = NA_character_,
            .default = NA_character_),      

      # respondent's religion (character vector that corresponds to master list)
        resp_denomination = "", # the original variable (Q1012A) has huge missingness and unexplained values; can't be relied upon

      # respondent's age (character vector; bins denoted by single dash ["18-25"])
        resp_age =
          dplyr::recode(
            as.character(Q1001),
            "100" = NA_character_,
            "99999" = NA_character_),        
      
      # respondent's education level
        resp_education_original =
          dplyr::recode(
            as.character(Q1003),
            "1" = "1. No formal education [No education]",
            "2" = "2. Elementary [No education]",
            "3" = "3. Preparatory/Basic [Primary]",
            "4" = "4. Secondary [Primary]",
            "5" = "5. Mid-level diploma/professional or technical [Primary]",
            "6" = "6. BA [College]",
            "7" = "7. MA and above [College]",
            .default = NA_character_),       
      
      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female = (Q1002 == 2)*1,
      
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0)
        resp_rural = 
          case_when(
            Q13A %in% c(3) ~ 1, 
            Q13A %in% c(1, 2, 4, 5) ~ 0,
            TRUE ~ NA_real_),
      
    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: Q602.1; QTEXT: For each of the following types of people, please tell me whether you would like having people from this group as neighbors, dislike it, or not care. People of a different religion.; ROPTIONS: 1 = Strongly dislike [=1] + 2 = Somewhat dislike [=1] + 3 = Would not care [=0] + 4 = Somewhat like [=0] + 5 = Strongly like [=0]; TARGET: Different religion; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_1_original = 
          dplyr::recode(
            as.character(Q602_1),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            Q602_1 %in% c(1:2) ~ 1,
            Q602_1 %in% c(3:5) ~ 0,
            TRUE ~ NA_real_),

    #########################  
    ### SOCIAL DISTANCE 2 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_2_qinfo = "NUM: Q602.4A; QTEXT: For each of the following types of people, please tell me whether you would like having people from this group as neighbors, dislike it, or not care. People of a different sect of Islam. [Asked of Muslims only.]; ROPTIONS: 1 = Strongly dislike [=1] + 2 = Somewhat dislike [=1] + 3 = Would not care [=0] + 4 = Somewhat like [=0] + 5 = Strongly like [=0]; TARGET: Different sect, Muslim; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_2_original = 
          dplyr::recode(
            as.character(Q602_4A),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_2_bin_recode = 
          case_when(
            Q602_4A %in% c(1:2) ~ 1,
            Q602_4A %in% c(3:5) ~ 0,
            TRUE ~ NA_real_),
    
    #########################  
    ### SOCIAL DISTANCE 3 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_3_qinfo = "NUM: Q602.4B; QTEXT: For each of the following types of people, please tell me whether you would like having people from this group as neighbors, dislike it, or not care. People of a different Christian denomination. [Asked of Christians only.]; ROPTIONS: 1 = Strongly dislike [=1] + 2 = Somewhat dislike [=1] + 3 = Would not care [=0] + 4 = Somewhat like [=0] + 5 = Strongly like [=0]; TARGET: Different sect, Christian; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_3_original = 
          dplyr::recode(
            as.character(Q602_4B),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_3_bin_recode = 
          case_when(
            Q602_4B %in% c(1:2) ~ 1,
            Q602_4B %in% c(3:5) ~ 0,
            TRUE ~ NA_real_),
    
    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = NA,
    
      # original response (as character vector)
        resp_gentrust_original = NA,       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = NA,
    
    #########################  
    ##### RELIGIOSITY #######  
    #########################
    
      # original question number; question text; response options (input above)
        resp_religiosity_qinfo = "NUM: Q609; QTEXT: In general, you would describe yourself as ...?;  ROPTIONS: 1 = Religious + 2 = Somewhat religious + 3 = Not religious",
  
      # original response (as numeric vector, with non-substantive responses coded as NA_real_)
        resp_religiosity_original = 
          dplyr::recode(
            as.numeric(Q609),
            "98" = NA_real_,
            "99" = NA_real_),       

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = (3 - resp_religiosity_original)/2,
    
    ) %>% 
    select(starts_with("resp_"))
```

# Stack dataframes

```{r}
# stack
  stacked <- 
    clean_ab1 %>% 
    bind_rows(clean_ab2) %>% 
    bind_rows(clean_ab4) %>% 
    bind_rows(clean_ab5) %>% 
    bind_rows(clean_ab6)
```

# Save data

```{r}
  saveRDS(stacked, "../cleaned-data/y-22-multi-arab-barometer.rds")
```
